rm(list=ls())
load("alumni.Rdata")
names(alumni)[4] = "HABITAT"
head(alumni)
alumni=alumni[sample(1:length(alumni$PMAT_6), replace = F, size = 4000), ]
dim(alumni)
## [1] 4000   10
#alumni$ANY = alumni$ANY - min(alumni$ANY)

# for(i in 0:2){
#   indices = which(alumni$ANY == i)
#   alumni$PCAT[indices] = scale(alumni$PCAT[indices], center = T, scale = F)
#   alumni$PCAST[indices] = scale(alumni$PCAST[indices], center = T, scale = F)
#   alumni$PANG[indices] = scale(alumni$PANG[indices], center = T, scale = F)
#   alumni$PMAT[indices] = scale(alumni$PMAT[indices], center = T, scale = F)
#   alumni$PCIEN[indices] = scale(alumni$PCIEN[indices], center = T, scale = F)
#   alumni$PUMAN[indices] = scale(alumni$PUMAN[indices], center = T, scale = F)
# }

library(fastDummies)
alumni = dummy_cols(alumni, select_columns = c("NATURALESA", "GENERE", "HABITAT"), remove_first_dummy = T, remove_selected_columns = T)

head(alumni)
areas = dummy_cols(data.frame(Area = alumni$AREA_TERRITORIAL), remove_first_dummy = T, remove_selected_columns = T)
names(areas)
##  [1] "Area_Barcelona Comarques"             
##  [2] "Area_Barcelona II (Comarques)"        
##  [3] "Area_Catalunya Central"               
##  [4] "Area_Consorci d'Educació de Barcelona"
##  [5] "Area_Girona"                          
##  [6] "Area_Lleida"                          
##  [7] "Area_Maresme - Vallès Oriental"       
##  [8] "Area_Maresme Vallès Oriental"         
##  [9] "Area_Maresme-Vallès Oriental"         
## [10] "Area_Tarragona"                       
## [11] "Area_Terres de l'Ebre"                
## [12] "Area_Vallès Occidental"
m=dim(areas)[2]
# model = lm(PMAT ~ . - PUMAN + PUMAN:GENERE, data=alumni)
# summary(model)
# plot(function(x){3 - 1*exp(-1*x)}, xlim=c(0,100), xlab="age", ylab="length")
# plot(function(x){3 - 1*exp(-0.1*x)}, xlim=c(0,100), add=TRUE, lty=2)
# plot(function(x){3 - 1*exp(-0.01*x)}, xlim=c(0,100), add=TRUE, lty=2)
# legend("bottomright",c("b2=1","b2=0.1","b2=0.01"), lty=c(1,2,3))

#in analisi descriptiva ver si las dferentes covariatas tienen: # - un oattern particular # - una varianza diferente # HACER PLOTS

#si en catalano subes de uno la nota es probable que la subas tambien en mate?

#el modelo con habitat tendrìa que ser arreglado con 2 dummies

mod.alumni <- "
model {

 for (i in 1:n) {
  y[i]~ dnorm(b0 + b1*x1[i] + b2*x2[i] + b3*x3[i] + b4*x4[i] + b5*x5[i] + b6*x6[i] + b7*x7[i] + b8*x8[i] + b9*x9[i], tau_y)
 }
 
 b0 ~ dnorm(0, 1)
 b1 ~ dnorm(1, 1)  
 b2 ~ dnorm(1, 1)
 b3 ~ dnorm(1, 1)
 b4 ~ dnorm(1, 1)
 b5 ~ dnorm(0, 1)
 b6 ~ dnorm(0, 1)
 b7 ~ dnorm(0, 1)
 b8 ~ dnorm(0, 1)
 b9 ~ dnorm(0, 1)

 tau_y ~ dgamma(0.001, 0.001)
 sigma_y <- 1/sqrt(tau_y)

}
"

Iter <- 5000
Burn <- 0
Chain <- 2
Thin <- 1 #per eliminare l'effetto dell'autocorrelazione delle simulazioni

data = with(alumni, list(y = PMAT_6, x1 = PMAT_4, x2=PLENG_4, x3=PLENG_6, x4=PANG_4, x5=PANG_6, x6=NATURALESA_Pública, x7=`HABITAT_Fins a 10000`, x8=`HABITAT_Més de 100000`, x9=GENERE_H, n = dim(alumni)[1]))

parameters <- c("b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", "b8", "b9", "sigma_y")

initials=list(list(b0 = 3, b1 = 1, b2= 0.1, b3=0, b4=0, b5=0, b6=0, b7=0,b8=0,b9=0,
                   tau_y = 4),
              list(b0 = 2, b1 = 1.5, b2= 0.2, b3=0, b4=1, b5=1, b6=1, b7=1, b8=1,b9=1,
                   tau_y = 2))
alumni.sim <- jags(data, inits=initials, parameters.to.save=parameters, 
          n.iter=(Iter*Thin+Burn),n.burnin=Burn, n.thin=Thin, n.chains=Chain, 
          model=textConnection(mod.alumni))
## module glm loaded
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 4000
##    Unobserved stochastic nodes: 11
##    Total graph size: 47370
## 
## Initializing model
traceplot(alumni.sim, mfrow = c(2,2), varname = parameters, col=c("black","red"), ask=F)

print(alumni.sim, digits=2)
## Inference for Bugs model at "4", fit using jags,
##  2 chains, each with 5000 iterations (first 0 discarded)
##  n.sims = 10000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5% Rhat
## b0          -4.57    0.90    -6.30    -5.16    -4.56    -3.98    -2.87    1
## b1           0.56    0.02     0.53     0.55     0.56     0.57     0.59    1
## b2           0.18    0.02     0.14     0.17     0.18     0.20     0.23    1
## b3           0.01    0.02    -0.04    -0.01     0.01     0.02     0.05    1
## b4           0.35    0.02     0.32     0.34     0.35     0.36     0.38    1
## b5          -0.15    0.02    -0.19    -0.16    -0.15    -0.13    -0.10    1
## b6          -2.69    0.39    -3.45    -2.95    -2.69    -2.43    -1.93    1
## b7           0.50    0.50    -0.46     0.16     0.50     0.84     1.47    1
## b8          -0.72    0.39    -1.47    -0.98    -0.72    -0.45     0.03    1
## b9           4.38    0.38     3.64     4.12     4.38     4.63     5.11    1
## sigma_y     12.13    0.14    11.87    12.04    12.13    12.23    12.40    1
## deviance 31320.06    9.91 31301.59 31313.29 31319.75 31326.53 31340.43    1
##          n.eff
## b0       10000
## b1       10000
## b2        5200
## b3       10000
## b4       10000
## b5       10000
## b6        4200
## b7       10000
## b8       10000
## b9       10000
## sigma_y   2100
## deviance 10000
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 49.1 and DIC = 31369.2
## DIC is an estimate of expected predictive error (lower deviance is better).
linea = which(rownames(alumni.sim$BUGSoutput$summary)=="deviance")

confid.inter = alumni.sim$BUGSoutput$summary[-linea,c(3,7)]
plot(1:length(confid.inter[,1]), confid.inter[,1], col="blue")
points(1:length(confid.inter[,1]), confid.inter[,2], col="red")
abline(h=0, lty=2)

mod.alumni.2 <- "
model {

 for (i in 1:n) {
  y[i]~ dnorm(b0 + b1*x1[i] + b2*x2[i] + b3*x3[i] + b4*x4[i] + b5*x5[i] + b6*x6[i] + b7*x7[i] + b8*x8[i] + b9*x9[i] + sum(G*geo[i,]), tau_y)
 }
 
 b0 ~ dnorm(0, 1)
 b1 ~ dnorm(1, 1)  
 b2 ~ dnorm(1, 1)
 b3 ~ dnorm(1, 1)
 b4 ~ dnorm(1, 1)
 b5 ~ dnorm(0, 1)
 b6 ~ dnorm(0, 1)
 b7 ~ dnorm(0, 1)
 b8 ~ dnorm(0, 1)
 b9 ~ dnorm(0, 1)

 tau_y ~ dgamma(0.001, 0.001)
 sigma_y <- 1/sqrt(tau_y)
 
 for(i in 1:m) {
 G[i] ~ dnorm(0, tau_g)
 # G[i] ~ dnorm(0, 0.001)
 }
 
 tau_g ~ dgamma(0.001, 0.001)
 sigma_g <- 1/sqrt(tau_g)
 # sigma_g=1
}
"

Iter <- 5000
Burn <- 0
Chain <- 2
Thin <- 1 #per eliminare l'effetto dell'autocorrelazione delle simulazioni

data.2 <- list(y = alumni$PMAT_6, x1 = alumni$PMAT_4, x2=alumni$PLENG_4, x3=alumni$PLENG_6, x4=alumni$PANG_4, x5=alumni$PANG_6, x6=alumni$NATURALESA_Pública, x7=alumni$`HABITAT_Fins a 10000`, x8=alumni$`HABITAT_Més de 100000`, x9=alumni$GENERE_H, geo=areas, n = dim(alumni)[1], m=m)

parameters.2 <- c("b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", "b8", "b9", "sigma_y", "sigma_g", "G")

initials.2=list(list(b0 = 3, b1 = 1, b2= 0.1, b3=0, b4=0, b5=0, b6=0, b7=0,b8=0,b9=0,
                   tau_y = 4, tau_g= 4, G=rep(0,m)),
              list(b0 = 2, b1 = 1.5, b2= 0.2, b3=0, b4=1, b5=1, b6=1, b7=1, b8=1,b9=1,
                   tau_y = 2, tau_g = 2, G=rep(0,m)))
alumni.sim.2 <- jags(data.2, inits=initials.2, parameters.to.save=parameters.2, 
          n.iter=(Iter*Thin+Burn),n.burnin=Burn, n.thin=Thin, n.chains=Chain, 
          model=textConnection(mod.alumni.2))
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 4000
##    Unobserved stochastic nodes: 24
##    Total graph size: 99409
## 
## Initializing model
traceplot(alumni.sim.2, mfrow = c(2,2), varname = parameters.2, col=c("black","red"), ask=F)

print(alumni.sim.2, digits=2)
## Inference for Bugs model at "5", fit using jags,
##  2 chains, each with 5000 iterations (first 0 discarded)
##  n.sims = 10000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5% Rhat
## G[1]        -0.78    0.75    -2.47    -1.26    -0.66    -0.14     0.24    1
## G[2]         0.02    1.27    -2.62    -0.45     0.01     0.48     2.71    1
## G[3]        -1.02    0.90    -2.99    -1.64    -0.89    -0.22     0.20    1
## G[4]        -0.19    0.61    -1.63    -0.50    -0.06     0.17     0.86    1
## G[5]         0.04    0.56    -1.21    -0.22     0.05     0.37     1.12    1
## G[6]        -1.12    1.03    -3.39    -1.82    -0.93    -0.21     0.20    1
## G[7]        -0.38    0.60    -1.78    -0.73    -0.24     0.03     0.55    1
## G[8]        -0.01    1.27    -2.78    -0.47     0.00     0.48     2.67    1
## G[9]        -0.11    1.04    -2.52    -0.54    -0.02     0.37     1.96    1
## G[10]        0.65    0.93    -0.80     0.01     0.43     1.18     2.89    1
## G[11]       -0.60    1.29    -4.01    -1.08    -0.24     0.08     1.27    1
## G[12]       -0.91    0.76    -2.54    -1.42    -0.83    -0.25     0.13    1
## b0          -4.46    0.88    -6.20    -5.05    -4.47    -3.88    -2.76    1
## b1           0.56    0.02     0.53     0.55     0.56     0.58     0.60    1
## b2           0.19    0.02     0.14     0.17     0.19     0.20     0.23    1
## b3           0.01    0.02    -0.04    -0.01     0.01     0.02     0.05    1
## b4           0.35    0.02     0.32     0.34     0.35     0.36     0.38    1
## b5          -0.14    0.02    -0.19    -0.16    -0.14    -0.13    -0.10    1
## b6          -2.62    0.39    -3.40    -2.88    -2.61    -2.35    -1.85    1
## b7           0.59    0.52    -0.41     0.24     0.58     0.95     1.61    1
## b8          -0.66    0.46    -1.52    -0.98    -0.67    -0.36     0.27    1
## b9           4.37    0.37     3.64     4.11     4.37     4.62     5.10    1
## sigma_g      1.00    0.74     0.05     0.43     0.90     1.41     2.72    1
## sigma_y     12.12    0.14    11.85    12.03    12.12    12.21    12.39    1
## deviance 31310.47   11.56 31289.12 31302.35 31310.18 31318.08 31333.79    1
##          n.eff
## G[1]      4100
## G[2]     10000
## G[3]      4200
## G[4]     10000
## G[5]     10000
## G[6]      6100
## G[7]      2000
## G[8]      6900
## G[9]     10000
## G[10]     3300
## G[11]    10000
## G[12]     2000
## b0       10000
## b1       10000
## b2       10000
## b3       10000
## b4        3200
## b5        9500
## b6       10000
## b7       10000
## b8       10000
## b9       10000
## sigma_g   2900
## sigma_y  10000
## deviance  6300
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 66.8 and DIC = 31377.3
## DIC is an estimate of expected predictive error (lower deviance is better).
linea = which(rownames(alumni.sim.2$BUGSoutput$summary)=="deviance")

confid.inter = alumni.sim.2$BUGSoutput$summary[-linea,c(3,7)]
plot(1:length(confid.inter[,1]), confid.inter[,1], col="blue")
points(1:length(confid.inter[,1]), confid.inter[,2], col="red")
abline(h=0, lty=2)

mod.alumni.3 <- "
model {

 for (i in 1:n) {
  y[i]~ dnorm(b0 + b1*x1[i] + b2*x2[i] + b3*x3[i] + b4*x4[i] + b5*x5[i] + b6*x6[i] + b7*x7[i] + b8*x8[i] + b9*x9[i] + b10*x3[i]*x9[i], tau_y)
 }
 
 b0 ~ dnorm(0, 1)
 b1 ~ dnorm(1, 1)  
 b2 ~ dnorm(1, 1)
 b3 ~ dnorm(1, 1)
 b4 ~ dnorm(1, 1)
 b5 ~ dnorm(0, 1)
 b6 ~ dnorm(0, 1)
 b7 ~ dnorm(0, 1)
 b8 ~ dnorm(0, 1)
 b9 ~ dnorm(0, 1)
 b10 ~ dnorm(0, 1)

 tau_y ~ dgamma(0.001, 0.001)
 sigma_y <- 1/sqrt(tau_y)

}
"

Iter <- 5000
Burn <- 0
Chain <- 2
Thin <- 1 #per eliminare l'effetto dell'autocorrelazione delle simulazioni

data.3 <- with(alumni, list(y = PMAT_6, x1 = PMAT_4, x2=PLENG_4, x3=PLENG_6, x4=PANG_4, x5=PANG_6, x6=NATURALESA_Pública, x7=`HABITAT_Fins a 10000`, x8=`HABITAT_Més de 100000`, x9=GENERE_H, n = dim(alumni)[1]))

parameters.3 <- c("b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", "b8", "b9", "b10", "sigma_y")

initials.3=list(list(b0 = 3, b1 = 1, b2= 0.1, b3=0, b4=0, b5=0, b6=0, b7=0,b8=0,b9=0, b10=0,
                   tau_y = 4),
              list(b0 = 2, b1 = 1.5, b2= 0.2, b3=0, b4=1, b5=1, b6=1, b7=1, b8=1,b9=1, b10=1,
                   tau_y = 2))
alumni.sim.3 <- jags(data.3, inits=initials.3, parameters.to.save=parameters.3, 
          n.iter=(Iter*Thin+Burn),n.burnin=Burn, n.thin=Thin, n.chains=Chain, 
          model=textConnection(mod.alumni.3))
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 4000
##    Unobserved stochastic nodes: 12
##    Total graph size: 47942
## 
## Initializing model
traceplot(alumni.sim.3, mfrow = c(2,2), varname = parameters.3, col=c("black","red"), ask=F)

print(alumni.sim.3, digits=2)
## Inference for Bugs model at "6", fit using jags,
##  2 chains, each with 5000 iterations (first 0 discarded)
##  n.sims = 10000 iterations saved
##           mu.vect sd.vect     2.5%      25%      50%      75%    97.5% Rhat
## b0          -4.05    0.89    -5.74    -4.63    -4.06    -3.47    -2.34    1
## b1           0.56    0.02     0.53     0.55     0.56     0.57     0.59    1
## b10          0.07    0.01     0.05     0.06     0.07     0.08     0.10    1
## b2           0.21    0.02     0.17     0.20     0.21     0.23     0.26    1
## b3          -0.03    0.02    -0.08    -0.05    -0.03    -0.02     0.01    1
## b4           0.35    0.02     0.32     0.34     0.35     0.36     0.38    1
## b5          -0.14    0.02    -0.18    -0.16    -0.14    -0.13    -0.10    1
## b6          -2.56    0.39    -3.32    -2.83    -2.56    -2.30    -1.82    1
## b7           0.53    0.50    -0.46     0.18     0.53     0.87     1.51    1
## b8          -0.69    0.39    -1.44    -0.96    -0.69    -0.42     0.08    1
## b9          -0.32    0.87    -2.00    -0.90    -0.33     0.26     1.37    1
## sigma_y     12.12    0.14    11.85    12.02    12.12    12.21    12.39    1
## deviance 31308.12    8.22 31293.33 31302.43 31307.69 31313.37 31325.38    1
##          n.eff
## b0       10000
## b1       10000
## b10      10000
## b2       10000
## b3       10000
## b4       10000
## b5       10000
## b6       10000
## b7        5600
## b8       10000
## b9       10000
## sigma_y  10000
## deviance 10000
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).
## 
## DIC info (using the rule, pD = var(deviance)/2)
## pD = 33.8 and DIC = 31341.9
## DIC is an estimate of expected predictive error (lower deviance is better).
linea = which(rownames(alumni.sim.3$BUGSoutput$summary)=="deviance")
confid.inter = alumni.sim.3$BUGSoutput$summary[-linea,c(3,7)]
plot(1:length(confid.inter[,1]), confid.inter[,1], col="blue")
points(1:length(confid.inter[,1]), confid.inter[,2], col="red")
abline(h=0, lty=2)